0.1 Background

0.2 Handel Polyclonal infections

Create two objects containing the list of samples that are monoclonals and polyclonals

monoclonals = Pviv_rGenome_object@data$metadata %>% 
  filter(Clonality %in% c('Highly related Polyclonal')) %>%
  select(Sample_id) %>% unlist()

polyclonals = Pviv_rGenome_object@data$metadata %>% 
  filter(Clonality %in% c('Polyclonal')) %>%
  select(Sample_id) %>% unlist()

Create a new rGenome object that only has the information of monoclonals samples

Pviv_rGenome_object_mono = filter_samples(
      Pviv_rGenome_object , 
      v = Pviv_rGenome_object@data$metadata$Sample_id %in% monoclonals)

Calculate the expected heterozygosity assuming that all samples are diploids

Pviv_rGenome_object@data$loci_table$ExpHet_all_as_diploid = 
  get_ExpHet(obj = Pviv_rGenome_object, update_AC = T, monoclonals = NULL, polyclonals = c(monoclonals, polyclonals))

Calculate the expected heterozygosity considering that all samples are monoclonals, it means that secondary alleles won’t be included

Pviv_rGenome_object@data$loci_table$ExpHet_all_as_haploid = 
  get_ExpHet(obj = Pviv_rGenome_object, update_AC = T, monoclonals = c(monoclonals, polyclonals))

Calculate the expected heterozygosity considering only monoclonal samples

Pviv_rGenome_object@data$loci_table$ExpHet_only_monoclonals = 
  get_ExpHet(
    Pviv_rGenome_object_mono, 
    update_AC = T, 
    monoclonals = monoclonals)

Calculate the expected heterozygosity defining the ploidy of each position individually

Pviv_rGenome_object@data$loci_table$ExpHet_default = get_ExpHet(obj = Pviv_rGenome_object)

Calculate the expected heterozygosity defining the ploidy of each samples based on our previous classification. It means that in monoclonal samples secondary alleles will be removed, while in polyclonal samples all observed alleles will be included

Pviv_rGenome_object@data$loci_table$ExpHet_default2 = 
  get_ExpHet(obj = Pviv_rGenome_object, update_AC = T, monoclonals = monoclonals, polyclonals = polyclonals)

Create a plot of ExpHet using the different approaches

Pviv_rGenome_object@data$loci_table %>%
  pivot_longer(cols = c('ExpHet_all_as_haploid', 'ExpHet_only_monoclonals', 'ExpHet_default', 'ExpHet_default2'), names_to = 'Treatment', values_to = 'ExpHet')%>%
  ggplot(aes(x = ExpHet_all_as_diploid, y = ExpHet))+
  geom_point(alpha = .1)+
  facet_grid(.~Treatment)+
  theme_bw()

Create a plot of the distribution of ExpHet by by type of marker

Pviv_rGenome_object@data$loci_table %>%
  pivot_longer(cols = c('ExpHet_all_as_diploid','ExpHet_all_as_haploid', 'ExpHet_only_monoclonals', 'ExpHet_default', 'ExpHet_default2'), names_to = 'Treatment', values_to = 'ExpHet')%>%
  ggplot(aes(fill = Treatment, x = ExpHet))+
  geom_histogram(alpha = .4)+
  facet_grid(TypeOf_Markers~Treatment, scales = 'free_y')+
  theme_bw()

Calculate Heterozygosity by each population (Subnational_level0: Regions within Colombia)

ExpHet_by_Pop= 
  get_ExpHet(obj = Pviv_rGenome_object, update_AC = T, monoclonals = NULL, polyclonals = NULL, by = 'Country')
ExpHet_by_Pop %>%
  pivot_longer(cols = c('Perú', 'Venezuela'), names_to = 'Region', values_to = 'ExpHet')%>%
  ggplot(aes(fill = Region, x = ExpHet))+
  geom_histogram(alpha = .4)+
  facet_grid(TypeOf_Markers~Region, scales = 'free_y')+
  theme_bw()

0.3 Nucleotide diversity

Calculate Nuc. Div. by gene by Population (Subnational_level0)

pi_by_gene_by_country = get_nuc_div(Pviv_rGenome_object, monoclonals = monoclonals, polyclonals = polyclonals, gff = '../reference/genes.gff', type_of_region = 'gene', window = NULL, by = 'Country')
mhp_pi_by_gene_by_country = pi_by_gene_by_country %>%
  pivot_longer(cols = c('Perú_pi', 'Venezuela_pi', 'Total_pi'), names_to = 'Regions', values_to = 'pi')%>%
  mutate(CHROM2  = gsub('(^(\\d|P|v)+_|_v1)', '',seqid))%>%
  ggplot(aes(x = start, y = pi, color = Regions)) +
    geom_point(alpha = 0.5, size = .25) +
    scale_color_manual(values = c('dodgerblue3', 'firebrick3', 'gray40'))+
    facet_grid(Regions ~ CHROM2, scales = 'free_x')+
    theme_minimal()+
    labs(y = 'Nuc. Div.', x = 'Chromosomal position', color = 'Region')+
    theme(legend.position = 'right',
          axis.text.x = element_blank(),
          axis.title.x = element_blank())

mhp_pi_by_gene_by_country

mhp_pi_by_gene_by_country = ggplotly(mhp_pi_by_gene_by_country)

chromosomes = c('PvP01_01_v1', 'PvP01_02_v1', 'PvP01_03_v1', 'PvP01_04_v1',
                'PvP01_05_v1', 'PvP01_06_v1', 'PvP01_07_v1', 'PvP01_08_v1',
                'PvP01_09_v1', 'PvP01_10_v1', 'PvP01_11_v1', 'PvP01_12_v1',
                'PvP01_13_v1', 'PvP01_14_v1', 'PvP01_API_v1', 'PvP01_MIT_v1')

for(region in 1:3){
  for(chrom in 1:16){
    mhp_pi_by_gene_by_country$x$data[[chrom + (region - 1)*16]]$text = paste(mhp_pi_by_gene_by_country$x$data[[chrom + (region - 1)*16]]$text,
      pi_by_gene_by_country[pi_by_gene_by_country$seqid == chromosomes[chrom],]$gene_description, sep = '<br />')
  }
}


mhp_pi_by_gene_by_country

0.4 Genetic differentiation

pca_PerVen = fastGRM(obj = Pviv_rGenome_object, k = 2, monoclonals = monoclonals, polyclonals = polyclonals, Pop = 'Country')

pca_PerVen  %>% ggplot(aes(x = PC1, y = PC2, color = Country))+
  geom_point(alpha = .7, size = 2) +
  stat_ellipse(level = .6)+
  scale_color_manual(values = c('dodgerblue3', 'firebrick3'))+
  theme_bw()+
  labs(title = 'WGS - PerVen',
       color = 'Regions')+
  theme(legend.position = c(.8, .8))

Genetic Differentiation within Peru

Pviv_rGenome_object_Peru = filter_samples(Pviv_rGenome_object, v = 
                                          Pviv_rGenome_object@data$metadata$Country == 'Perú')

monoclonals_per = Pviv_rGenome_object_Peru@data$metadata[Pviv_rGenome_object_Peru@data$metadata$Clonality == "Highly related Polyclonal",][['Sample_id']]

polyclonals_per = Pviv_rGenome_object_Peru@data$metadata[Pviv_rGenome_object_Peru@data$metadata$Clonality == "Polyclonal",][['Sample_id']]

pca_Per = fastGRM(obj = Pviv_rGenome_object_Peru, k = 2, monoclonals = monoclonals_per, polyclonals = polyclonals_per, Pop = 'Subnational_level0')


pca_Per  %>% ggplot(aes(x = PC1, y = PC2, color = Subnational_level0))+
  geom_point(alpha = .7, size = 2) +
  stat_ellipse(level = .6)+
  theme_bw()+
  labs(title = 'WGS - Per',
       color = 'Regions')

Genetic Differentiation within Peru

Pviv_rGenome_object_Ven = filter_samples(Pviv_rGenome_object, v = 
                                          Pviv_rGenome_object@data$metadata$Country == 'Venezuela')

monoclonals_ven = Pviv_rGenome_object_Ven@data$metadata[Pviv_rGenome_object_Ven@data$metadata$Clonality == "Highly related Polyclonal",][['Sample_id']]

polyclonals_ven = Pviv_rGenome_object_Ven@data$metadata[Pviv_rGenome_object_Ven@data$metadata$Clonality == "Polyclonal",][['Sample_id']]

pca_Ven = fastGRM(obj = Pviv_rGenome_object_Ven, k = 2, monoclonals = monoclonals_ven, polyclonals = polyclonals_ven, Pop = 'Subnational_level2')


pca_Ven  %>% ggplot(aes(x = PC1, y = PC2, color = Subnational_level2))+
  geom_point(alpha = .7, size = 2) +
  stat_ellipse(level = .6)+
  theme_bw()+
  labs(title = 'WGS - Ven',
       color = 'Regions')